/////
// paper_figures.do
// This file creates the figures in the main paper.
// The input files are individual_level.dta and localauthority_level.dta. 
/////

/////
// Figure 1: Distribution of salary scales across local authorities, 2004-2013. 
/////

* open individual-level data
use "individual_level.dta", clear 

* create Figure 1
contract salaryscale_final
twoway bar _freq salaryscale_final if salaryscale_final != ., ///
	barwidth(.8) color("82 82 82") ///
	xlabel(1(2)20) xtick(1(1)20) ///
	yscale(range(0 35000)) ylabel(0(10000)35000) ytick(0(5000)35000) ///
	title("") ytitle("Frequency") xtitle("Salary scale") ///
	xline(9.5, lpattern(shortdash)) ///
	text(34000 4 "1-9 managed by Nairobi", size(small)) ///
	text(34000 16 "10-20 managed by localities", size(small))	

/////
// Figure 2: Clerk assignment by ethnic group in the council majority.
/////

* open local authority-level data
use "localauthority_level.dta", clear 

* reshape to wide 
keep territory_level year ethnicity la_clerk clerketh majority
levelsof ethnicity, local(ETH)
reshape wide majority clerketh, i(territory_level year) j(ethnicity) string

* gen coeth_clerk
foreach i of local ETH {
    gen coeth_clerk`i' = 1 if majority`i' == 1 & clerketh`i' == 1 & la_clerk == 1
	replace coeth_clerk`i' = 0 if coeth_clerk`i' == .
}

* reshape to long to gen noncoeth_clerk
reshape long majority clerketh coeth_clerk, i(territory_level year) j(ethnicity) string

* noncoeth_clerk
gen noncoeth_clerk = 1 if la_clerk == 1 & coeth_clerk == 0
replace noncoeth_clerk = 0 if noncoeth_clerk == .

* create a has_majority var 
egen has_majority = max(majority), by(territory_level year)

* reshape to wide but only for majority and clerketh and then merge back 
preserve
keep territory_level year ethnicity majority clerketh
reshape wide majority clerketh, i(territory_level year) j(ethnicity) string
tempfile majclerk_ethwide
save `majclerk_ethwide'
restore

* filter to only majority == 1
keep if majority == 1 | /// keeps those la / years with a majority (most)
		(has_majority == 0 & la_clerk == 1 & clerketh == 1) /// keeps those la / years with a clerk but not majority
		
* merge back in the majority and clerk ethnicities 
merge 1:1 territory_level year using `majclerk_ethwide'
drop _merge

* any missing obs are those la / years where
	* there is (a) no majority and (b) no clerk 
	* so make these var 0
replace majority = 0 if majority == . 
replace clerketh = 0 if clerketh == .
replace la_clerk = 0 if la_clerk == .
replace coeth_clerk = 0 if coeth_clerk == .
replace noncoeth_clerk = 0 if noncoeth_clerk == .
replace has_majority = 0 if has_majority == .

* rename ethnicity
rename ethnicity council_eth 
replace council_eth = "NO MAJORITY" if council_eth == ""

* create a factor version of council_eth
encode council_eth, gen(council_ethfac) 

* create a clerk status varible 
gen clerk_status = "Co-ethnic clerk" if coeth_clerk == 1
replace clerk_status = "Non co-ethnic clerk" if noncoeth_clerk == 1
replace clerk_status = "No clerk" if la_clerk == 0

* make clerk groups
gen no_clerk = 1 if la_clerk == 0
replace no_clerk = 0 if la_clerk == 1
summarize noncoeth_clerk coeth_clerk no_clerk

* create Figure 2
graph bar (sum) coeth_clerk noncoeth_clerk no_clerk if council_eth != "NO MAJORITY", ///
	over(council_eth, sort(1) descending label(labsize(small) angle(30))) stack ///
	ytitle("Number of locality-years", size(medsmall)) ///
	title("") ///
	note("Note: The groups descend from left to right by the number of locality-years with co-ethnic clerks.") ///
	legend(title("Clerk status relative to majority ethnic group", size(4)) ///
		   order(1 "Co-ethnic clerk" 2 "Non-co-ethnic clerk" 3 "No clerk") rows(1) size(2.5) region(lwidth(none)) position("bottom")) ///
	bar(1, color("82 82 82")) bar(2, color("150 150 150")) bar(3, color("204 204 204"))

/////
// Figure 3: Hiring bias suggestive of patronage. 
/////

* open local authority-level data
use "localauthority_level.dta", clear 

quietly summarize phiredw if phiredwmiss != 1, detail
graph bar phiredw if phiredw < `r(p99)' & phiredwmiss != 1, over(majority, gap(15)) ///
	asyvars bar(1, color("150 150 150")) bar(2, color("82 82 82"))  ///
	legend(order(1 "Ethnic groups non-co-ethnic with council majority" 2 "Ethnic groups co-ethnic with council majority") region(lwidth(none)) rows(2) size(medsmall) position(bottom)) ///
	yscale(range(0 2)) ylab(0(.25)2, nogrid) ///
	yline(1, lpattern(shortdash) lcolor(black)) ///
	title("") ytitle("") ///
	text(1.4 2.75 "Overrepresented" "in bureaucracy" "relative to" "local population", size(medsmall) placement(east)) ///
	text(.7 0 "Underrepresentated" "in bureaucracy" "relative to" "local population", size(medsmall) placement(east)) ///
	caption("Note: The bias statistics are averaged across all ethnic group-locality-year combinations.", size(small))

/////
// Figure 4: Hiring bias suggestive of bureaucratic favoritism. 
/////

* open local authority-level data
use "localauthority_level.dta", clear 

* create clerkgroup
gen clerkgroup = 1 if la_clerk == 0 & clerketh == 0  // local authorities with no clerk 
replace clerkgroup = 2 if la_clerk == 1 & clerketh == 0 // local authorities with a non-coethnic clerk
replace clerkgroup = 3 if la_clerk == 1 & clerketh == 1 // local authorities with a coethnic clerk

* calc values for the figure
quietly summarize phiredw if phiredwmiss != 1, detail
keep if phiredw < `r(p99)' & phiredwmiss != 1
bysort clerkgroup: egen phiredw_clerkmean = mean(phiredw)

* plot
quietly summarize phiredw if phiredwmiss != 1, detail
graph bar phiredw if phiredw < `r(p99)' & phiredwmiss != 1, over(clerkgroup, gap(10)) ///
	asyvars bar(1, color("204 204 204")) bar(2, color("150 150 150")) bar(3, color("82 82 82")) ///
	legend(order(1 "Ethnic groups in localities with no clerk" ///
				 2 "Ethnic groups in localities with a non-co-ethnic clerk" ///
				 3 "Ethnic groups in localities with a co-ethnic clerk") size(medsmall) rows(3) region(lwidth(none)) position(bottom)) ///
	yscale(range(0 2)) ylab(0(.25)2, nogrid) ///
	yline(1, lpattern(shortdash) lcolor(black)) ///
	title("") ytitle("") ///
	text(1.4 2.75 "Overrepresented" "in bureaucracy" "relative to" "local population", size(medsmall) placement(east)) ///
	text(.7 0 "Underrepresentated" "in bureaucracy" "relative to" "local population", size(medsmall) placement(east)) ///
	caption("Note: The bias statistics are averaged across all ethnic group-locality-year combinations.", size(small)) 

/////
// Figure 5: Linear combinations from Table 4, Columns 1 and 3
/////

* open local authority-level data
use "localauthority_level.dta", clear 

* define dependent variables
local depvar hired1020 hired19
foreach i of local depvar {
	
* model with ethnic group-locality and year fixed effects 
local indvar "c.majority##c.la_clerk##c.clerketh##c.perc`i'miss" 
	quietly reghdfe perc`i' `indvar', absorb(TERETHid year) vce(cluster territory_level) // ethnic group-locality and year fixed effects 
		est store model_perc`i'_m1
		estadd local TERETH "Yes"
		estadd local ETHYEAR "No"
		estadd local year "Yes"
		estadd local TERYEAR "No"
		* calculate linear combinations and other stats 	
			* no clerk and j is non co-ethnic with the council
			summarize perc`i' if majority == 0 & la_clerk == 0 & clerketh == 0
			estadd scalar noclerk_majnoteth = r(mean)
			* no clerk and j is co-ethnic with the council
			est restore model_perc`i'_m1
			lincom _b[majority]
			estadd scalar noclerk_majeth = r(estimate)
			estadd scalar noclerk_majeth_se = r(se)
			* j is non-coethnic with the clerk and the council
			est restore model_perc`i'_m1
			lincom _b[la_clerk] 
			estadd scalar clerknoteth_majnoteth = r(estimate)
			estadd scalar clerknoteth_majnoteth_se = r(se)
			* j is non co-ethnic with the clerk, but co-ethnic with the council
			est restore model_perc`i'_m1
			lincom  _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk]
			estadd scalar clerknoteth_majeth = r(estimate)
			estadd scalar clerknoteth_majeth_se = r(se)
			* j is co-ethnic with the clerk, but not with the council
			est restore model_perc`i'_m1
			lincom _b[la_clerk] + _b[clerketh]
			estadd scalar clerketh_majnoteth = r(estimate)
			estadd scalar clerketh_majnoteth_se = r(se)
			* j is co-ethnic with the clerk and the council 
			est restore model_perc`i'_m1
			lincom _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk] + _b[clerketh] + _b[c.majority#c.clerketh]
			estadd scalar clerketh_majeth = r(estimate)
			estadd scalar clerketh_majeth_se = r(se)
			* store other stats
			gen used_d`i' = e(sample)
			sum perc`i' if used_d`i' == 1
			estadd scalar outcome_mean = r(mean)
			estfe model_perc`i'_m1, labels(TERETHid "LA-ethnicity FE" year "Year FE" ETHYEARid "Ethnicity-year FE" TERYEARid "LA-year FE")
			drop used*
			est restore model_perc`i'_m1
			nlcom (lincom1: _b[majority]) ///
				  (lincom2: _b[la_clerk]) ///
				  (lincom3: _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk]) ///
				  (lincom4: _b[la_clerk] + _b[clerketh]) ///
				  (lincom5: _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk] + _b[clerketh] + _b[c.majority#c.clerketh]), post
			est store perc`i'_m1
	
* model with ethnic group-locality, ethnic group-year, and locality-year fixed effects
local indvar "c.majority##c.la_clerk##c.clerketh##c.perc`i'miss" 
	quietly reghdfe perc`i' `indvar', absorb(TERETHid ETHYEARid TERYEARid) vce(cluster territory_level) // ethnic group-year and locality-year fixed effects 
		est store model_perc`i'_m2
		estadd local TERETH "Yes"
		estadd local ETHYEAR "Yes"
		estadd local year "No"
		estadd local TERYEAR "Yes"
		* calculate linear combinations and other stats 	
			* no clerk and j is non co-ethnic with the council
			summarize perc`i' if majority == 0 & la_clerk == 0 & clerketh == 0
			estadd scalar noclerk_majnoteth = r(mean)
			* no clerk and j is co-ethnic with the council
			est restore model_perc`i'_m2
			lincom _b[majority] 
			estadd scalar noclerk_majeth = r(estimate)
			estadd scalar noclerk_majeth_se = r(se)
			* j is non-coethnic with the clerk and the council
			est restore model_perc`i'_m2
			lincom _b[la_clerk] 
			estadd scalar clerknoteth_majnoteth = r(estimate)
			estadd scalar clerknoteth_majnoteth_se = r(se)
			* j is non co-ethnic with the clerk, but co-ethnic with the council
			est restore model_perc`i'_m2
			lincom  _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk]
			estadd scalar clerknoteth_majeth = r(estimate)
			estadd scalar clerknoteth_majeth_se = r(se)
			* j is co-ethnic with the clerk, but not with the council
			est restore model_perc`i'_m2
			lincom _b[la_clerk] + _b[clerketh]
			estadd scalar clerketh_majnoteth = r(estimate)
			estadd scalar clerketh_majnoteth_se = r(se)
			* j is co-ethnic with the clerk and the council 
			est restore model_perc`i'_m2
			lincom _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk] + _b[clerketh] + _b[c.majority#c.clerketh]
			estadd scalar clerketh_majeth = r(estimate)
			estadd scalar clerketh_majeth_se = r(se)
			* store other stats
			gen used_d`i' = e(sample)
			sum perc`i' if used_d`i' == 1
			estadd scalar outcome_mean = r(mean)
			estfe model_perc`i'_m2, labels(TERETHid "LA-ethnicity FE" year "Year FE" ETHYEARid "Ethnicity-year FE" TERYEARid "LA-year FE")
			drop used*
			* store other stats
			est restore model_perc`i'_m2
			nlcom (lincom1: _b[majority]) ///
				  (lincom2: _b[la_clerk]) ///
				  (lincom3: _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk]) ///
				  (lincom4: _b[la_clerk] + _b[clerketh]) ///
				  (lincom5: _b[majority] + _b[la_clerk] + _b[c.majority#c.la_clerk] + _b[clerketh] + _b[c.majority#c.clerketh]), post
			est store perc`i'_m2
			
}

* plot
coefplot (perchired19_m1, if(@ll<0 & @ul>0) keep(lincom1) offset(0.05) msymbol(S) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("1-9 salary group")) ///
	 (perchired19_m1, if(@ll<0 & @ul>0) keep(lincom2) offset(0.05) msymbol(S) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("1-9 salary group")) ///
	 (perchired19_m1, if(@ll<0 & @ul>0) keep(lincom3) offset(0.05) msymbol(S) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("1-9 salary group")) ///
	 (perchired19_m1, if(@ll<0 & @ul>0) keep(lincom4) offset(0.05) msymbol(S) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("1-9 salary group")) ///
	 (perchired19_m1, if(@ll<0 & @ul>0) keep(lincom5) offset(0.05) msymbol(S) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("1-9 salary group")) ///
	 (perchired19_m1, if(@ll>0 | @ul<0) keep(lincom1) offset(0.05) msymbol(S) mcolor(red) ciopts(lcol(red) lpatt(dash)) label(""))  ///
	 (perchired19_m1, if(@ll>0 | @ul<0) keep(lincom2) offset(0.05) msymbol(S) mcolor(red) ciopts(lcol(red) lpatt(dash)) label(""))  ///
	 (perchired19_m1, if(@ll>0 | @ul<0) keep(lincom3) offset(0.05) msymbol(S) mcolor(red) ciopts(lcol(red) lpatt(dash)) label(""))  ///
	 (perchired19_m1, if(@ll>0 | @ul<0) keep(lincom4) offset(0.05) msymbol(S) mcolor(red) ciopts(lcol(red) lpatt(dash)) label(""))  ///
	 (perchired19_m1, if(@ll>0 | @ul<0) keep(lincom5) offset(0.05) msymbol(S) mcolor(red) ciopts(lcol(red) lpatt(dash)) label(""))  ///
	 (perchired1020_m1, if(@ll<0 & @ul>0) keep(lincom1) offset(0.05) msymbol(O) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("10-20 salary group")) ///
	 (perchired1020_m1, if(@ll<0 & @ul>0) keep(lincom2) offset(0.05) msymbol(O) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("10-20 salary group")) ///
	 (perchired1020_m1, if(@ll<0 & @ul>0) keep(lincom3) offset(0.05) msymbol(O) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("10-20 salary group")) ///
	 (perchired1020_m1, if(@ll<0 & @ul>0) keep(lincom4) offset(0.05) msymbol(O) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("10-20 salary group")) ///
	 (perchired1020_m1, if(@ll<0 & @ul>0) keep(lincom5) offset(0.05) msymbol(O) mcolor(black) ciopts(lcol(black) lpatt(dash)) label("10-20 salary group")) ///
	 (perchired1020_m1, if(@ll>0 | @ul<0) keep(lincom1) offset(-0.05) msymbol(O) mcolor(red) ciopts(lcol(red) lpatt(dash)) label("")) ///
	 (perchired1020_m1, if(@ll>0 | @ul<0) keep(lincom2) offset(-0.05) msymbol(O) mcolor(red) ciopts(lcol(red) lpatt(dash)) label("")) ///
	 (perchired1020_m1, if(@ll>0 | @ul<0) keep(lincom3) offset(-0.05) msymbol(O) mcolor(red) ciopts(lcol(red) lpatt(dash)) label("")) ///
	 (perchired1020_m1, if(@ll>0 | @ul<0) keep(lincom4) offset(-0.05) msymbol(O) mcolor(red) ciopts(lcol(red) lpatt(dash)) label("")) ///
	 (perchired1020_m1, if(@ll>0 | @ul<0) keep(lincom5) offset(-0.05) msymbol(O) mcolor(red) ciopts(lcol(red) lpatt(dash)) label("")), ///
	 coeflabels(lincom1 = "No clerk and j is coethnic with the council" ///
				lincom2 = "j is non-co-ethnic with the clerk and the council" ///
				lincom3 = "j is non-co-ethnic with the clerk, but co-ethnic with the council" ///
				lincom4 = "j is co-ethnic with the clerk, but not with the council" ///
				lincom5 = "j is co-ethnic with the clerk and the council", wrap(32)) ///
	 xline(0) xlabel(-.2(.1).8) ///
	 legend(order(2 "1-9 salary group" 22 "10-20 salary group") region(lstyle(none))) ///
	 title("") xtitle("Estimate", margin(medsmall))








